In [1]:
!pwd
/geschwindlabshares/AMRF_Riki/2021/2021-Pancortical_scRNAseq_Brie/Pegasus_072721_highRes_ngene750
In [2]:
import pegasus as pg
data=pg.read_input("../Pegasus_072621/sample.main.lowRes.rmDB.rmNA.zarr")
2021-07-27 11:01:09,483 - pegasusio.readwrite - INFO - zarr file '../Pegasus_072621/sample.main.lowRes.rmDB.rmNA.zarr' is loaded.
2021-07-27 11:01:09,486 - pegasusio.readwrite - INFO - Function 'read_input' finished in 107.37s.
In [3]:
print(data.obs['Channel'].value_counts())
print(data.obs['anno'].value_counts())
4787_BA17    28612
8792_BA17    22122
0679_BA17    15914
5144_BA17    14316
WQ16         13826
46BW         12517
WQ1          11280
WQ6          11184
WQ20         10421
WQ11          9738
WQ7           8981
WQ13          8685
76BW          8038
WQ8           6476
WQ17          6261
66BW          5800
WQ10          5655
WQ12          5543
WQ19          5255
60BW          5182
WQ4           4501
WQ2           3907
2BW           3814
31BW          3544
WQ3           3442
WQ5           3243
WQ15          2502
WQ14          2008
WQ18          1970
3BW           1926
WQ9           1577
22BW          1323
8BW            970
4BW            806
5BW            783
6BW            623
13BW           566
12BW           524
7BW            486
Name: Channel, dtype: int64
ODC1     34539
INH1     25280
EX1      24493
ODC2     24035
EX2      21994
EX3      21030
AST1     20142
EX4      19504
MG       18412
OPC      17405
END       9514
AST3      6456
AST2      4680
EX5       4580
MG2       2257
OPC9         0
OPC8         0
OPC7         0
NA           0
NA2          0
EX8          0
EX7          0
EX6          0
OPC10        0
Name: anno, dtype: int64
In [4]:
pg.qc_metrics(data,min_genes=750,percent_mito=10,max_genes=6000,mito_prefix='MT')
pg.qcviolin(data,plot_type='gene',dpi=75,n_violin_per_panel=40,panel_size=(12,6) )
pg.qcviolin(data,plot_type='mito',dpi=75,n_violin_per_panel=40,panel_size=(12,6) )
In [5]:
pg.filter_data(data)
pg.identify_robust_genes(data)
pg.log_norm(data)
pg.highly_variable_features(data, consider_batch=True,n_top=3000)
2021-07-27 11:01:25,757 - pegasusio.qc_utils - INFO - After filtration, 199617 out of 254321 cell barcodes are kept in UnimodalData object Hg38-rna.
2021-07-27 11:01:25,758 - pegasus.tools.preprocessing - INFO - Function 'filter_data' finished in 4.67s.
2021-07-27 11:01:40,979 - pegasus.tools.preprocessing - INFO - After filtration, 34978/35412 genes are kept. Among 34978 genes, 27574 genes are robust.
2021-07-27 11:01:40,981 - pegasus.tools.preprocessing - INFO - Function 'identify_robust_genes' finished in 15.22s.
2021-07-27 11:01:43,159 - pegasus.tools.preprocessing - WARNING - Rerun log-normalization. Use the raw counts in backup instead.
2021-07-27 11:01:59,964 - pegasus.tools.preprocessing - INFO - Function 'log_norm' finished in 18.98s.
2021-07-27 11:02:05,367 - pegasus.tools.hvf_selection - INFO - Function 'estimate_feature_statistics' finished in 5.40s.
2021-07-27 11:02:05,471 - pegasus.tools.hvf_selection - INFO - 3000 highly variable features have been selected.
2021-07-27 11:02:05,473 - pegasus.tools.hvf_selection - INFO - Function 'highly_variable_features' finished in 5.51s.
In [6]:
pg.pca(data,n_components=65)
pg.regress_out(data, attrs=['n_genes', 'percent_mito'])
2021-07-27 11:03:20,808 - pegasus.tools.preprocessing - INFO - Function 'pca' finished in 75.33s.
2021-07-27 11:03:22,897 - pegasus.tools.preprocessing - INFO - Function 'regress_out' finished in 2.08s.
Out[6]:
'pca_regressed'
In [7]:
harmony_key = pg.run_harmony(data)
pg.neighbors(data,rep=harmony_key)
2021-07-27 11:03:24,554 - pegasus.tools.batch_correction - INFO - Start integration using Harmony.
	Initialization is completed.
	Completed 1 / 10 iteration(s).
	Completed 2 / 10 iteration(s).
	Completed 3 / 10 iteration(s).
Reach convergence after 3 iteration(s).
2021-07-27 11:58:12,603 - pegasus.tools.batch_correction - INFO - Function 'run_harmony' finished in 3289.69s.
2021-07-27 11:59:49,616 - pegasus.tools.nearest_neighbors - INFO - Function 'get_neighbors' finished in 97.01s.
2021-07-27 12:00:07,700 - pegasus.tools.nearest_neighbors - INFO - Function 'calculate_affinity_matrix' finished in 18.08s.
In [8]:
pg.louvain(data,rep=harmony_key,resolution=1.3)
pg.umap(data,rep=harmony_key)
pg.scatter(data, attrs=['louvain_labels', 'Channel'], basis='umap')
2021-07-27 12:00:20,971 - pegasus.tools.graph_operations - INFO - Function 'construct_graph' finished in 13.24s.
2021-07-27 12:07:40,261 - pegasus.tools.clustering - INFO - Louvain clustering is done. Get 26 clusters.
2021-07-27 12:07:40,768 - pegasus.tools.clustering - INFO - Function 'louvain' finished in 453.05s.
2021-07-27 12:07:40,770 - pegasus.tools.nearest_neighbors - INFO - Found cached kNN results, no calculation is required.
2021-07-27 12:07:40,771 - pegasus.tools.nearest_neighbors - INFO - Function 'get_neighbors' finished in 0.00s.
2021-07-27 12:07:40,879 - pegasus.tools.visualization - INFO - UMAP(dens_frac=0.0, dens_lambda=0.0, min_dist=0.5, random_state=0, verbose=True)
2021-07-27 12:07:40,882 - pegasus.tools.visualization - INFO - Construct fuzzy simplicial set
2021-07-27 12:07:46,841 - pegasus.tools.visualization - INFO - Construct embedding
	completed  0  /  200 epochs
	completed  20  /  200 epochs
	completed  40  /  200 epochs
	completed  60  /  200 epochs
	completed  80  /  200 epochs
	completed  100  /  200 epochs
	completed  120  /  200 epochs
	completed  140  /  200 epochs
	completed  160  /  200 epochs
	completed  180  /  200 epochs
2021-07-27 12:14:18,831 - pegasus.tools.visualization - INFO - Function 'umap' finished in 398.06s.
In [9]:
pg.scatter(data, attrs=['Diagnosis','Sex'], basis='umap')
pg.scatter(data, attrs=['Age','BA'], basis='umap')
pg.scatter(data, attrs=['BrainRegion','n_genes', 'percent_mito'], basis='umap')
pg.scatter(data, attrs=['louvain_labels'], basis='umap',legend_loc='on data',dpi=200)
pg.scatter(data, attrs=['percent_mito', 'n_genes'], basis='umap',dpi=100)
In [10]:
pg.de_analysis(data, cluster='louvain_labels')
2021-07-27 12:15:16,837 - pegasus.tools.diff_expr - INFO - CSR matrix is converted to CSC matrix. Time spent = 29.5547s.
2021-07-27 12:26:21,944 - pegasus.tools.diff_expr - INFO - MWU test and AUROC calculation are finished. Time spent = 665.1069s.
2021-07-27 12:26:23,816 - pegasus.tools.diff_expr - INFO - Sufficient statistics are collected. Time spent = 1.8729s.
2021-07-27 12:26:25,546 - pegasus.tools.diff_expr - INFO - Differential expression analysis is finished.
2021-07-27 12:26:25,547 - pegasus.tools.diff_expr - INFO - Function 'de_analysis' finished in 698.27s.
In [11]:
celltype_dict = pg.infer_cell_types(data, markers = 'human_brain',output_file='annot.txt')
In [12]:
!cat annot.txt
Cluster 1:
    name: Oligodendrocyte; score: 1.00; average marker percentage: 80.80%; strong support: (PLP1+,95.85%),(MAG+,47.79%),(MBP+,98.76%)
Cluster 2:
    name: Astrocyte; score: 1.00; average marker percentage: 66.16%; strong support: (SLC1A3+,97.42%),(GFAP+,42.67%),(APOE+,48.23%),(SLC1A2+,98.76%),(SLC14A1+,43.72%)
    name: Mural; score: 1.00; average marker percentage: 28.82%; strong support: (PDGFRB+,28.82%)
Cluster 3:
    name: Oligodendrocyte; score: 1.00; average marker percentage: 86.44%; strong support: (PLP1+,98.13%),(MAG+,61.38%),(MBP+,99.80%)
Cluster 4:
    name: Glutamatergic neuron; score: 1.00; average marker percentage: 86.48%; strong support: (RBFOX3+,91.03%),(MEG3+,99.90%),(SLC17A7+,68.50%)
    name: Mural; score: 0.53; average marker percentage: 11.81%; weak support: (PDGFRB+,11.81%)
    name: GABAergic neuron; score: 0.52; average marker percentage: 79.08%; strong support: (RBFOX3+,91.03%),(MEG3+,99.90%); weak support: (GRIK1+,46.31%)
Cluster 5:
    name: Glutamatergic neuron; score: 1.00; average marker percentage: 88.12%; strong support: (RBFOX3+,93.65%),(MEG3+,99.95%),(SLC17A7+,70.77%)
Cluster 6:
    name: Glutamatergic neuron; score: 0.88; average marker percentage: 59.04%; strong support: (MEG3+,95.16%),(SLC17A7+,38.37%); weak support: (RBFOX3+,43.60%)
Cluster 7:
    name: Microglia; score: 1.00; average marker percentage: 59.86%; strong support: (TGFBR1+,48.10%),(DOCK8+,89.65%),(CD74+,31.49%),(CSF1R+,66.54%),(MS4A6A+,25.52%),(PLXDC2+,97.85%)
Cluster 8:
    name: OPC; score: 1.00; average marker percentage: 86.21%; strong support: (PDGFRA+,79.60%),(VCAN+,92.81%)
Cluster 9:
    name: Oligodendrocyte; score: 1.00; average marker percentage: 67.39%; strong support: (PLP1+,83.47%),(MAG+,25.92%),(MBP+,92.77%)
Cluster 10:
    name: GABAergic neuron; score: 1.00; average marker percentage: 73.56%; strong support: (RBFOX3+,78.89%),(MEG3+,99.57%),(GAD1+,72.73%),(GAD2+,51.60%),(GRIK1+,65.01%)
        name: Gabaergic VIP interneuron; score: 1.00; average marker percentage: 37.34%; strong support: (VIP+,37.34%)
        name: GABAergic SST interneuron; score: 0.52; average marker percentage: 5.00%; weak support: (SST+,5.00%)
    name: Glutamatergic neuron; score: 0.67; average marker percentage: 89.23%; strong support: (RBFOX3+,78.89%),(MEG3+,99.57%)
Cluster 11:
    name: Glutamatergic neuron; score: 1.00; average marker percentage: 91.11%; strong support: (RBFOX3+,95.53%),(MEG3+,99.84%),(SLC17A7+,77.96%)
    name: Mural; score: 0.53; average marker percentage: 12.13%; weak support: (PDGFRB+,12.13%)
Cluster 12:
    name: GABAergic neuron; score: 1.00; average marker percentage: 82.96%; strong support: (RBFOX3+,81.85%),(MEG3+,98.69%),(GAD1+,75.50%),(GAD2+,59.59%),(GRIK1+,99.19%)
        name: GABAergic SST interneuron; score: 1.00; average marker percentage: 34.04%; strong support: (SST+,34.04%)
        name: Gabaergic PVALB interneuron; score: 0.50; average marker percentage: 4.95%; weak support: (PVALB+,4.95%)
    name: Glutamatergic neuron; score: 0.67; average marker percentage: 90.27%; strong support: (RBFOX3+,81.85%),(MEG3+,98.69%)
Cluster 13:
    name: GABAergic neuron; score: 1.00; average marker percentage: 79.60%; strong support: (RBFOX3+,71.53%),(MEG3+,99.63%),(GAD1+,81.76%),(GAD2+,84.74%),(GRIK1+,60.33%)
        name: Gabaergic PVALB interneuron; score: 1.00; average marker percentage: 34.62%; strong support: (PVALB+,34.62%)
    name: Glutamatergic neuron; score: 0.67; average marker percentage: 85.58%; strong support: (RBFOX3+,71.53%),(MEG3+,99.63%)
Cluster 14:
    name: Astrocyte; score: 1.00; average marker percentage: 69.03%; strong support: (SLC1A3+,84.76%),(GFAP+,80.40%),(APOE+,43.46%),(SLC1A2+,86.96%),(SLC14A1+,49.57%)
    name: Mural; score: 0.82; average marker percentage: 16.69%; weak support: (PDGFRB+,16.69%)
    name: OPC; score: 0.50; average marker percentage: 65.30%; strong support: (VCAN+,65.30%)
Cluster 15:
    name: Endothelial; score: 1.00; average marker percentage: 91.29%; strong support: (FLT1+,95.25%),(CLDN5+,87.33%)
    name: Mural; score: 1.00; average marker percentage: 26.00%; strong support: (PDGFRB+,26.00%)
Cluster 16:
    name: Glutamatergic neuron; score: 1.00; average marker percentage: 88.32%; strong support: (RBFOX3+,86.55%),(MEG3+,99.55%),(SLC17A7+,78.85%)
Cluster 17:
    name: Glutamatergic neuron; score: 1.00; average marker percentage: 91.19%; strong support: (RBFOX3+,94.63%),(MEG3+,99.97%),(SLC17A7+,78.97%)
Cluster 18:
    name: Glutamatergic neuron; score: 1.00; average marker percentage: 86.98%; strong support: (RBFOX3+,89.23%),(MEG3+,100.00%),(SLC17A7+,71.72%)
Cluster 19:
    name: GABAergic neuron; score: 1.00; average marker percentage: 89.63%; strong support: (RBFOX3+,89.01%),(MEG3+,98.70%),(GAD1+,77.30%),(GAD2+,89.66%),(GRIK1+,93.48%)
        name: GABAergic SST interneuron; score: 0.55; average marker percentage: 5.63%; weak support: (SST+,5.63%)
    name: OPC; score: 0.78; average marker percentage: 33.25%; strong support: (PDGFRA+,42.64%); weak support: (VCAN+,23.86%)
    name: Glutamatergic neuron; score: 0.67; average marker percentage: 93.85%; strong support: (RBFOX3+,89.01%),(MEG3+,98.70%)
Cluster 20:
    name: Mural; score: 1.00; average marker percentage: 57.35%; strong support: (PDGFRB+,57.35%)
    name: Endothelial; score: 0.73; average marker percentage: 10.71%; weak support: (FLT1+,13.93%),(CLDN5+,7.48%)
Cluster 21:
    name: Oligodendrocyte; score: 1.00; average marker percentage: 73.06%; strong support: (PLP1+,85.99%),(MAG+,41.05%),(MBP+,92.16%)
    name: Astrocyte; score: 0.99; average marker percentage: 52.07%; strong support: (SLC1A3+,92.35%),(GFAP+,31.92%),(SLC1A2+,82.38%),(SLC14A1+,20.95%); weak support: (APOE+,32.73%)
    name: Mural; score: 0.92; average marker percentage: 22.76%; weak support: (PDGFRB+,22.76%)
    name: Microglia; score: 0.87; average marker percentage: 34.24%; strong support: (DOCK8+,42.90%),(CSF1R+,30.17%),(PLXDC2+,87.98%); weak support: (TGFBR1+,27.46%),(CD74+,10.74%),(MS4A6A+,6.18%)
Cluster 22:
    name: Glutamatergic neuron; score: 1.00; average marker percentage: 89.34%; strong support: (RBFOX3+,94.55%),(MEG3+,100.00%),(SLC17A7+,73.47%)
Cluster 23:
    name: Glutamatergic neuron; score: 1.00; average marker percentage: 81.95%; strong support: (RBFOX3+,81.71%),(MEG3+,100.00%),(SLC17A7+,64.13%)
Cluster 24:
    name: Glutamatergic neuron; score: 1.00; average marker percentage: 85.49%; strong support: (RBFOX3+,92.94%),(MEG3+,99.73%),(SLC17A7+,63.78%)
    name: Oligodendrocyte; score: 1.00; average marker percentage: 80.51%; strong support: (PLP1+,97.47%),(MAG+,44.47%),(MBP+,99.60%)
    name: GABAergic neuron; score: 0.70; average marker percentage: 67.38%; strong support: (RBFOX3+,92.94%),(MEG3+,99.73%),(GRIK1+,60.32%); weak support: (GAD2+,16.51%)
    name: Mural; score: 0.52; average marker percentage: 11.85%; weak support: (PDGFRB+,11.85%)
Cluster 25:
    name: GABAergic neuron; score: 0.60; average marker percentage: 80.37%; strong support: (MEG3+,99.86%),(GAD1+,63.39%),(GRIK1+,77.87%)
        name: Gabaergic PVALB interneuron; score: 1.00; average marker percentage: 47.68%; strong support: (PVALB+,47.68%)
Cluster 26:
    name: OPC; score: 0.90; average marker percentage: 55.47%; strong support: (VCAN+,92.52%); weak support: (PDGFRA+,18.42%)
    name: Oligodendrocyte; score: 0.67; average marker percentage: 82.55%; strong support: (PLP1+,83.66%),(MBP+,81.44%)
In [13]:
!sed -e ':a' -e 'N' -e '$!ba' -e 's/\:\n    name/ /g' annot.txt|sed  's/    name\(.*\)//g'|awk NF|sed 's/\;\(.*\)//g'
Cluster 1 : Oligodendrocyte
Cluster 2 : Astrocyte
Cluster 3 : Oligodendrocyte
Cluster 4 : Glutamatergic neuron
Cluster 5 : Glutamatergic neuron
Cluster 6 : Glutamatergic neuron
Cluster 7 : Microglia
Cluster 8 : OPC
Cluster 9 : Oligodendrocyte
Cluster 10 : GABAergic neuron
Cluster 11 : Glutamatergic neuron
Cluster 12 : GABAergic neuron
Cluster 13 : GABAergic neuron
Cluster 14 : Astrocyte
Cluster 15 : Endothelial
Cluster 16 : Glutamatergic neuron
Cluster 17 : Glutamatergic neuron
Cluster 18 : Glutamatergic neuron
Cluster 19 : GABAergic neuron
Cluster 20 : Mural
Cluster 21 : Oligodendrocyte
Cluster 22 : Glutamatergic neuron
Cluster 23 : Glutamatergic neuron
Cluster 24 : Glutamatergic neuron
Cluster 25 : GABAergic neuron
Cluster 26 : OPC
In [14]:
pg.write_output(data,"sample.main.highRes.zarr")
2021-07-27 12:26:44,732 - pegasusio.readwrite - INFO - zarr file 'sample.main.highRes.zarr' is written.
2021-07-27 12:26:44,735 - pegasusio.readwrite - INFO - Function 'write_output' finished in 17.98s.
calc_mwu finished for genes in [30610, 30883).
calc_mwu finished for genes in [1644, 1918).
calc_mwu finished for genes in [29518, 29791).
calc_mwu finished for genes in [2192, 2466).
calc_mwu finished for genes in [7672, 7946).
calc_mwu finished for genes in [32248, 32521).
calc_mwu finished for genes in [30337, 30610).
calc_mwu finished for genes in [31702, 31975).
calc_mwu finished for genes in [4384, 4658).
calc_mwu finished for genes in [5480, 5754).
calc_mwu finished for genes in [32794, 33067).
calc_mwu finished for genes in [30883, 31156).
calc_mwu finished for genes in [13957, 14230).
calc_mwu finished for genes in [15868, 16141).
calc_mwu finished for genes in [8494, 8768).
calc_mwu finished for genes in [3014, 3288).
calc_mwu finished for genes in [13138, 13411).
calc_mwu finished for genes in [5206, 5480).
calc_mwu finished for genes in [9589, 9862).
calc_mwu finished for genes in [25696, 25969).
calc_mwu finished for genes in [18598, 18871).
calc_mwu finished for genes in [28153, 28426).
calc_mwu finished for genes in [8768, 9042).
calc_mwu finished for genes in [20236, 20509).
calc_mwu finished for genes in [28699, 28972).
calc_mwu finished for genes in [27880, 28153).
calc_mwu finished for genes in [17779, 18052).
calc_mwu finished for genes in [11773, 12046).
calc_mwu finished for genes in [12319, 12592).
calc_mwu finished for genes in [18871, 19144).
calc_mwu finished for genes in [30064, 30337).
calc_mwu finished for genes in [20509, 20782).
calc_mwu finished for genes in [274, 548).
calc_mwu finished for genes in [10408, 10681).
calc_mwu finished for genes in [14776, 15049).
calc_mwu finished for genes in [16141, 16414).
calc_mwu finished for genes in [1918, 2192).
calc_mwu finished for genes in [14503, 14776).
calc_mwu finished for genes in [28426, 28699).
calc_mwu finished for genes in [24331, 24604).
calc_mwu finished for genes in [31429, 31702).
calc_mwu finished for genes in [22420, 22693).
calc_mwu finished for genes in [1096, 1370).
calc_mwu finished for genes in [822, 1096).
calc_mwu finished for genes in [9042, 9316).
calc_mwu finished for genes in [21328, 21601).
calc_mwu finished for genes in [32521, 32794).
calc_mwu finished for genes in [548, 822).
calc_mwu finished for genes in [27607, 27880).
calc_mwu finished for genes in [2740, 3014).
calc_mwu finished for genes in [3288, 3562).
calc_mwu finished for genes in [25969, 26242).
calc_mwu finished for genes in [19690, 19963).
calc_mwu finished for genes in [26242, 26515).
calc_mwu finished for genes in [23785, 24058).
calc_mwu finished for genes in [31156, 31429).
calc_mwu finished for genes in [27334, 27607).
calc_mwu finished for genes in [19144, 19417).
calc_mwu finished for genes in [5754, 6028).
calc_mwu finished for genes in [27061, 27334).
calc_mwu finished for genes in [24604, 24877).
calc_mwu finished for genes in [21601, 21874).
calc_mwu finished for genes in [4932, 5206).
calc_mwu finished for genes in [12865, 13138).
calc_mwu finished for genes in [16414, 16687).
calc_mwu finished for genes in [16960, 17233).
calc_mwu finished for genes in [22966, 23239).
calc_mwu finished for genes in [14230, 14503).
calc_mwu finished for genes in [31975, 32248).
calc_mwu finished for genes in [11500, 11773).
calc_mwu finished for genes in [33340, 33613).
calc_mwu finished for genes in [33886, 34159).
calc_mwu finished for genes in [6850, 7124).
calc_mwu finished for genes in [6302, 6576).
calc_mwu finished for genes in [7124, 7398).
calc_mwu finished for genes in [9862, 10135).
calc_mwu finished for genes in [29791, 30064).
calc_mwu finished for genes in [12046, 12319).
calc_mwu finished for genes in [33067, 33340).
calc_mwu finished for genes in [26515, 26788).
calc_mwu finished for genes in [7946, 8220).
calc_mwu finished for genes in [17506, 17779).
calc_mwu finished for genes in [9316, 9589).
calc_mwu finished for genes in [0, 274).
calc_mwu finished for genes in [1370, 1644).
calc_mwu finished for genes in [8220, 8494).
calc_mwu finished for genes in [33613, 33886).
calc_mwu finished for genes in [18325, 18598).
calc_mwu finished for genes in [34159, 34432).
calc_mwu finished for genes in [10954, 11227).
calc_mwu finished for genes in [2466, 2740).
calc_mwu finished for genes in [13411, 13684).
calc_mwu finished for genes in [10135, 10408).
calc_mwu finished for genes in [21055, 21328).
calc_mwu finished for genes in [16687, 16960).
calc_mwu finished for genes in [21874, 22147).
calc_mwu finished for genes in [24058, 24331).
calc_mwu finished for genes in [4110, 4384).
calc_mwu finished for genes in [23512, 23785).
calc_mwu finished for genes in [34705, 34978).
calc_mwu finished for genes in [18052, 18325).
calc_mwu finished for genes in [26788, 27061).
calc_mwu finished for genes in [15322, 15595).
calc_mwu finished for genes in [20782, 21055).
calc_mwu finished for genes in [28972, 29245).
calc_mwu finished for genes in [19417, 19690).
calc_mwu finished for genes in [15595, 15868).
calc_mwu finished for genes in [34432, 34705).
calc_mwu finished for genes in [25150, 25423).
calc_mwu finished for genes in [19963, 20236).
calc_mwu finished for genes in [10681, 10954).
calc_mwu finished for genes in [11227, 11500).
calc_mwu finished for genes in [22693, 22966).
calc_mwu finished for genes in [24877, 25150).
calc_mwu finished for genes in [17233, 17506).
calc_mwu finished for genes in [29245, 29518).
calc_mwu finished for genes in [25423, 25696).
calc_mwu finished for genes in [6028, 6302).
calc_mwu finished for genes in [3562, 3836).
calc_mwu finished for genes in [12592, 12865).
calc_mwu finished for genes in [4658, 4932).
calc_mwu finished for genes in [15049, 15322).
calc_mwu finished for genes in [23239, 23512).
calc_mwu finished for genes in [7398, 7672).
calc_mwu finished for genes in [22147, 22420).
calc_mwu finished for genes in [3836, 4110).
calc_mwu finished for genes in [13684, 13957).
calc_mwu finished for genes in [6576, 6850).
In [32]:
anno_dict=dict({'1':'ODC1','2':'AST1','3':'ODC2','4':'EX1','5':'EX2','6':'EX3','7':'MG1','8':'OPC1','9':'ODC3'
                ,'10':'INH1','11':'EX4','12':'INH2','13':'INH3','14':'AST2','15':'END1','16':'EX5','17':'EX6'
                ,'18':'EX7','19':'INH4','20':'Mural','21':'ODC3','22':'EX7','23':'EX8','24':'EX9','25':'INH5'
                ,'26':'OPC2'})

pg.annotate(data, name='anno', based_on='louvain_labels', anno_dict=anno_dict)
data.obs['anno'].value_counts()

pg.scatter(data, attrs='anno', basis='umap',legend_loc='on data',dpi=150)
In [33]:
pg.compo_plot(data,groupby='Diagnosis',condition='anno',dpi=70)
pg.compo_plot(data,groupby='Age',condition='anno',dpi=70)
pg.compo_plot(data,groupby='BrainRegion',condition='anno',dpi=70)
pg.compo_plot(data,groupby='Sex',condition='anno',dpi=70)
In [34]:
import pegasusio as pgio
pgio.write_output(data,"highRes.h5ad",file_type='h5ad')
2021-07-27 13:36:07,180 - pegasusio.readwrite - INFO - h5ad file 'highRes.h5ad' is written.
2021-07-27 13:36:07,184 - pegasusio.readwrite - INFO - Function 'write_output' finished in 457.86s.
In [35]:
# cell composition
import pandas as pd
df_summary=pd.crosstab(data.obs['anno'], data.obs['Diagnosis'])
df_summary_percent=df_summary/df_summary.sum()*100
A=round(df_summary_percent,2)
A
Out[35]:
Diagnosis ASD CTL
anno
AST1 10.79 7.69
AST2 2.72 1.80
END1 2.35 1.98
EX1 6.37 10.40
EX2 7.38 8.14
EX3 5.29 9.70
EX4 2.83 3.30
EX5 2.35 1.90
EX6 1.71 2.05
EX7 1.64 3.06
EX8 0.41 0.44
EX9 0.09 0.65
INH1 3.56 3.61
INH2 2.55 3.38
INH3 2.12 2.78
INH4 1.49 1.44
INH5 0.35 0.39
MG1 9.57 4.68
Mural 1.50 1.21
ODC1 9.83 9.23
ODC2 9.22 7.93
ODC3 7.81 7.63
OPC1 7.62 6.32
OPC2 0.46 0.26
In [36]:
A.to_csv("Cell_composition.csv")
In [40]:
!sbatch --export h5ad=highRes.h5ad /geschwindlabshares/AMRF_Riki/ASD_scRNAseq_Brie/Pegasus_rmDB/Convert2Seurat.sh 
Submitted batch job 117444
In [37]:
# cell composition
import pandas as pd
df_summary=pd.crosstab(data.obs['anno'], data.obs['BrainRegion'])
df_summary_percent=df_summary/df_summary.sum()*100
A=round(df_summary_percent,2)
A
Out[37]:
BrainRegion Occipital PFC Parietal
anno
AST1 9.30 8.99 9.18
AST2 2.34 2.51 1.90
END1 2.26 2.50 1.72
EX1 12.62 2.50 3.23
EX2 8.53 5.23 7.78
EX3 5.12 16.00 7.13
EX4 3.30 1.92 3.33
EX5 1.75 1.20 3.53
EX6 2.03 1.39 1.89
EX7 2.58 2.05 2.10
EX8 0.34 0.44 0.61
EX9 0.61 0.09 0.05
INH1 3.37 3.15 4.34
INH2 3.05 2.60 3.06
INH3 2.49 2.23 2.51
INH4 1.32 1.44 1.81
INH5 0.34 0.43 0.39
MG1 6.02 7.61 9.06
Mural 1.12 1.34 1.86
ODC1 10.00 9.00 8.84
ODC2 8.14 9.79 8.69
ODC3 6.81 10.16 8.07
OPC1 6.23 7.04 8.48
OPC2 0.32 0.40 0.42
In [38]:
A.to_csv("Cell_composition_brainRegion.csv")
In [41]:
marker_genes=['BMP4','PAK4','MLC1','GJB6','AQP4','ACSBG1','FLT1','CLDN5','TM4SF1','PGLYRP1','RGS5'
             ,'GAD1','GAD2','SLC6A5','PNOC','SLC17A6','SLC17A7','NRN1','C1QB','CD68','AIF1','MOG','OLIG1','OLIG2','MAG','SOX10','PDGFRA','CSPG4']
pg.heatmap(data, attrs=marker_genes, groupby='anno',switch_axes=True, dpi=80)